#!/usr/bin/env bash
# run-sampler -- Invokes DimmWitted or equivalent Gibbs sampler for learning and inference in a uniform way
# $ cd run/model/factorgraph/0
# $ run-sampler results_dir=... weights=... [NAME=VALUE...] -- SAMPLER_CMD [SAMPLER_ARG...]
##
set -euo pipefail

while [[ $# -gt 0 ]]; do
    case $1 in
        --) shift; break ;;
        *) declare -- "$1"; shift ;;
    esac
done

# required parameters
: ${DEEPDIVE_NUM_PROCESSES:?}
: ${DEEPDIVE_SAMPLER_NUM_PARALLEL_LOADS:=$DEEPDIVE_NUM_PROCESSES}
: ${results_dir:?}
: ${weights:?}

[[ $# -gt 0 ]] || usage "$0" "Missing SAMPLER_CMD"
SamplerCmd=$1; shift

flatten() { find -L "$@" -type f -size +0 -exec pbzip2 -c -d -k {} +; }

case $SamplerCmd in
    numbskull)
        set -x
        mkdir -p ./flat
        if [[ ./meta -nt ./flat/graph.meta ]]; then
            head -n 1 ./meta     >./flat/graph.meta
            # TODO: save disk space
            flatten ./variables/ >./flat/graph.variables
            flatten ./factors/   >./flat/graph.factors
            flatten ./domains/   >./flat/graph.domains
        fi
        flatten ./"$weights"/    >./flat/graph.weights
        numbskull ./flat                        \
            --output_dir "$results_dir"         \
            --threads $DEEPDIVE_NUM_PROCESSES   \
            "$@"
        ;;

    *) # assume DimmWitted-compatible command-line interface
        mk_process_substitution_opts() {
            # list of file paths should evenly spread into the degree of parallelism
            split --number=r/$DEEPDIVE_SAMPLER_NUM_PARALLEL_LOADS --elide-empty-files \
                --filter='echo -n " '"$1"' <(pbzip2 -c -d -k \\'$'\n''$(sed "s/\$/ \\\\/; s/^/    /;")'$'\n'')"'
        }
        # assemble many flags for multipart inputs to load them in parallel
        opts=
        opts+=$(find -L ./variables/  -type f -size +0 | mk_process_substitution_opts --variables)
        opts+=$(find -L ./domains/    -type f -size +0 | mk_process_substitution_opts --domains  )
        opts+=$(find -L ./factors/    -type f -size +0 | mk_process_substitution_opts --factors  )
        opts+=$(find -L ./"$weights"/ -type f -size +0 | mk_process_substitution_opts --weights  )
        set -x
        eval '$SamplerCmd gibbs                     \
            '"$opts"' \
            --fg_meta   ./meta                      \
            --outputFile "$results_dir"             \
            --n_threads $DEEPDIVE_NUM_PROCESSES     \
            "$@"'
esac
